#Replication Code for:
#Bureaucratic Bias or Voter-Side Factors? Testing Competing Explanations for Racial Gaps in Vote-By-Mail Ballot Signature Rejections 
#PRQ
#Signature Match Survey
#Herndon, Oskooii, and Rios

#Clear R
rm(list=ls())

#Load Packages
library(tidyverse)
library(readr)
library(stargazer)
library(ggthemes)
library(broom)
library(pollster)
library(survey)
library(descr)
library(anesrake)
library(data.table)
library(misty)
library(showtext)
library(sysfonts)


#############
# Read Data #
#############
sig_match_data<- read_csv("sig_match_data_no_weights.csv")


###################
# Weight Creation #
###################

#STATEWIDE: ACS 2016-2020
#https://www2.census.gov/programs-surveys/acs/data/pums/2020/5-Year/


#Data Dictionary: 
#https://www.census.gov/programs-surveys/acs/microdata/documentation.2020.html
#Direct link to dictionary: https://www2.census.gov/programs-surveys/acs/tech_docs/pums/data_dict/PUMS_Data_Dictionary_2016-2020.pdf


acs_wa <- fread("psam_p53.csv",
                select= c("PUMA", "AGEP", "SCHL", "SEX",
                          "RAC1P", "HISP", "CIT", "PWGTP"))

#Subset to CVAP
acs_final<-acs_wa %>% filter(AGEP>17 & CIT<5) 


####################################
#Create Target Variables and Recode # 
####################################

acs_final$Female[acs_final$SEX==1] <- 1 #Male
acs_final$Female[acs_final$SEX==2] <- 2 #Female


acs_final$Age[acs_final$AGEP<30] <- 1 # 18-29 #<30 
acs_final$Age[acs_final$AGEP>=30 & acs_final$AGEP<40] <- 2 # 30-39
acs_final$Age[acs_final$AGEP>=40 & acs_final$AGEP<50] <-3 # 40-49
acs_final$Age[acs_final$AGEP>=50 & acs_final$AGEP<60] <- 4 # 50-59
acs_final$Age[acs_final$AGEP>=60 & acs_final$AGEP<70] <- 5 # 60+
acs_final$Age[acs_final$AGEP>=70] <- 6 # 60+


acs_final$Education[acs_final$SCHL>=1 & acs_final$SCHL<18] <- 1 # HS Diploma/GED or less
acs_final$Education[acs_final$SCHL>=18 & acs_final$SCHL<21] <- 2 # Some college (up to AA)
acs_final$Education[acs_final$SCHL==21] <- 3 # Bachelor's degree
acs_final$Education[acs_final$SCHL>21] <- 4 # MA or higher

#Race 
acs_final$White <- 2 #Other Race
acs_final$White[acs_final$RAC1P==1 & acs_final$HISP==1] <- 1 # White Non-Hispanic (1=not-Hispanic)



###############################
#Create Population Benchmarks #
###############################

# Create survey design object 
svy.acs <- svydesign(ids=~1, data=acs_final, weights=acs_final$PWGTP)

Female <- svytable(~Female, design=svy.acs) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric() #  Male 0.496  Female 0.504

Age <- svytable(~Age, design=svy.acs) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric() # 18-29: 0.208; 30-39: 0.175; 40-49: 0.155; 50-59: 0.165;  60-69: 0.158; 70+: 0.138

#Age adds to .999, but must at to 100 to use anesrake (adjust manually)
Age<-c(0.208, 0.175, 0.155, 0.165, 0.158, 0.139)

Education <- svytable(~Education, design=svy.acs) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric() #HS or less: 0.306; Some C: 0.356;  BA: 0.218; >BA: 0.120

White<-svytable(~White, design=svy.acs) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric() #White:0.762; Else: 0.238

#Finalize Targets
targets <- list(Female, Age, Education, White)
names(targets) <- c("Female", "Age", "Education",
                    "White")

#Save targets 
saveRDS(targets, file="2016-2020_ACS_WA_Weights.RDS")


####################################
# Use {anesrake} to create weights #
####################################

#Upload data as data frame
sig_match_data<-as.data.frame(sig_match_data)

#Create unique, numeric case IDs 
sig_match_data$ResponseId2 <- as.numeric(seq_along(sig_match_data[,1]))

# Create weights
myweights1 <- anesrake(targets, sig_match_data,
                       caseid = sig_match_data$ResponseId2, cap = 5, type = "nolim") 

# Store the weights as a variable in the dataset 
sig_match_data$weight <- unlist(myweights1[1])

hist(sig_match_data$weight); mean(sig_match_data$weight)


#Weight summary
summary(sig_match_data$weight)


#Compare unweighted to weighted proportions
freq(sig_match_data$Female)
topline(df = sig_match_data, variable = Female, weight = weight)


freq(sig_match_data$Age)
topline(df = sig_match_data, variable = Age, weight = weight)


freq(sig_match_data$Education)
topline(df = sig_match_data, variable = Education, weight = weight)


freq(sig_match_data$White)
topline(df = sig_match_data, variable = White, weight = weight)

sig_match_data<-sig_match_data %>% select(-ResponseId2)




#Normalize All Variables (0-1 Range)--Min-Max Normalization

# Create Custom function to implement min max scaling
minMax <- function(x) {
  # compute range ignoring NAs
  rng <- range(x, na.rm = TRUE)
  if (rng[1] == rng[2]) {
    # constant column → map everything to 0
    return(rep(0, length(x)))
  }
  (x - rng[1]) / (rng[2] - rng[1])
}


#Normalize data using function
data_apply <- as.data.frame(lapply(sig_match_data[ , 11:24],  minMax))        #Apply function to specific columns 

data_new <- sig_match_data                                                  
data_new[ , colnames(data_new) %in% colnames(data_apply)] <- data_apply  # Replace specific columns
glimpse(data_new)                                                        #check results

#replace with data_new
sig_match_data<- data_new

#remove data_new and data_apply
rm(data_new,data_apply)

#Save data
write_csv(sig_match_data, "sig_match_data_with_weights.csv")




#################
# Paired T-Test #
#################

##########
#Table 8 #
##########

#Not Weighted
test.t(sig_match_data$Pct_Latino_Accept, sig_match_data$Pct_White_Accept, paired = TRUE, digits = 4)


#Weighted T-test
weights::wtd.t.test(sig_match_data$Pct_Latino_Accept, sig_match_data$Pct_White_Accept, 
                    weight=sig_match_data$weight, samedata = TRUE, drops = "pairwise")

weighted_CI<- (1.96*0.007763237) 
weighted_CI



##############################
# Weighted Regression Models #
##############################

#################
#Tables A4 + A5 #
#################

#Latino Accept Basic Model
summary(Latino_Accept_Basic<-lm(Pct_Latino_Accept~ 
                                  Explicit_Bias+
                                  Female+ Age+ Education+ Income+ White+Central_WA, 
                                data=sig_match_data, weight=weight
))

#Latino Accept Full Model
summary(Latino_Accept_Full<-lm(Pct_Latino_Accept~ 
                                 Explicit_Bias+
                                 Female+ Age+ Education+ Income+ White+ Central_WA+
                                 Republican+ Independent+ Lib_Con_Ideo, 
                               data=sig_match_data, weight=weight
))


#White Accept Basic Model
summary(White_Accept_Basic<-lm(Pct_White_Accept~ 
                                 Explicit_Bias+
                                 Female+ Age+ Education+ Income+ White+Central_WA, 
                               data=sig_match_data, weight=weight
))

#White Accept Full Model
summary(White_Accept_Full<-lm(Pct_White_Accept~ 
                                Explicit_Bias+
                                Female+ Age+ Education+ Income+ White+ Central_WA+
                                Republican+ Independent+ Lib_Con_Ideo, 
                              data=sig_match_data, weight=weight
))



tab_labels <- c( "Explicit Bias", "Female", "Age",  "Education",
                 "Income", "White", "Central WA", "Republican", "Independent", "Ideology (Lib-Con)")

stargazer(Latino_Accept_Basic, Latino_Accept_Full, covariate.labels = tab_labels,
          out.header=F,
          dep.var.labels =  c("Hispanic Signature Acceptance"),
          omit.stat=c("rsq", "adj.rsq"), ci=TRUE, ci.level=0.95, single.row=TRUE, 
          type = "latex",
          out = "table_latino_accept_weighted.tex", 
          label = "table_latino_accept_weighted.tex",
          star.char = c("*", "**", "***"), 
          star.cutoffs = c(.05, .01, .001),
          notes = "$^{***}$p $<$ .001; $^{**}$p $<$ .01; $^{*}$p $<$ .05",
          notes.append = F, 
          title = "Explicit Bias (Immigrants and Hispanic Signature Acceptance (Weighted)")


tab_labels <- c( "Explicit Bias", "Female", "Age",  "Education",
                 "Income", "White", "Central WA", "Republican", "Independent", "Ideology (Lib-Con)")

stargazer(White_Accept_Basic, White_Accept_Full, covariate.labels = tab_labels,
          out.header=F,
          dep.var.labels =  c("White Signature Acceptance"),
          omit.stat=c("rsq", "adj.rsq"), ci=TRUE, ci.level=0.95, single.row=TRUE, 
          type = "latex",
          out = "table_white_accept_weighted.tex", 
          label = "table_white_accept_weighted.tex",
          star.char = c("*", "**", "***"), 
          star.cutoffs = c(.05, .01, .001),
          notes = "$^{***}$p $<$ .001; $^{**}$p $<$ .01; $^{*}$p $<$ .05",
          notes.append = F, 
          title = "Explicit Bias (Immigrants) and White Signature Acceptance (Weighted)")



#####################################
# Regression Models without weights #
#####################################


###########
#Table A6 #
###########

#Latino Accept no Weight
summary(Latino_Accept_no_weight<-lm(Pct_Latino_Accept~ 
                                      Explicit_Bias+
                                      Female+ Age+ Education+ Income+ White+ Central_WA+
                                      Republican+ Independent+ Lib_Con_Ideo, 
                                    data=sig_match_data
))


#White Accept no Weight
summary(White_Accept_no_weight<-lm(Pct_White_Accept~ 
                                     Explicit_Bias+
                                     Female+ Age+ Education+ Income+ White+ Central_WA+
                                     Republican+ Independent+ Lib_Con_Ideo, 
                                   data=sig_match_data
))


tab_labels <- c( "Explicit Bias", "Female", "Age",  "Education",
                 "Income", "White", "Central WA","Republican", "Independent", "Ideology (Lib-Con)")

stargazer(Latino_Accept_no_weight, White_Accept_no_weight, covariate.labels = tab_labels,
          out.header=F,
          dep.var.labels =  c("Hispanic Signature Acceptance", "White Signature Acceptance"),
          omit.stat=c("rsq", "adj.rsq"), ci=TRUE, ci.level=0.95, single.row=TRUE, 
          type = "latex",
          out = "table_Latino_White_Accept_Full_No_Weight.tex", 
          label = "table_Latino_White_Accept_Full_No_Weight.tex",
          star.char = c("*", "**", "***"), 
          star.cutoffs = c(.05, .01, .001),
          notes = "$^{***}$p $<$ .001; $^{**}$p $<$ .01; $^{*}$p $<$ .05",
          notes.append = F, 
          title = "Explicit Bias (Immigrants) and Hispanic and White Signature Acceptance (Unweighted)")




####################
# Plotting Results #
####################

############
#Figure C1 #
############

coef_data_full <- tidy(Latino_Accept_Full, conf.int = TRUE) %>%
  mutate(model = "Model 2")

coef_data_basic <- tidy(Latino_Accept_Basic, conf.int = TRUE) %>%
  mutate(model = "Model 1")


coef_data_combined <- bind_rows(coef_data_full, coef_data_basic) %>%
  dplyr::filter(term != "(Intercept)") %>%
  mutate(term = dplyr::recode(term, "Explicit_Bias" = "Explicit Bias","Lib_Con_Ideo" = "Liberal-Conservative Ideo", "Central_WA"= "Central WA")) %>%
  mutate(Model = model)

font_add_google("EB Garamond", "ebgaramond")
showtext_auto()


ggplot(coef_data_combined, aes(x = estimate, y = factor(term, levels = c("Republican", "Independent", "Liberal-Conservative Ideo", "Central WA", "White", "Female", "Age", "Education", "Income", "Explicit Bias")))) +  
  geom_point(aes(shape = Model, fill = Model), size = 3.5, position = position_dodge(width = 0.5), color = "black") +  # Slightly larger dots  
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, linetype = Model), height = 0.3, position = position_dodge(width = 0.5)) +  # Slightly thicker error bars  
  geom_vline(xintercept = 0, linetype = "dotted", color = "black") + 
  labs(x = "Coefficient Estimates with 95% Confidence Intervals",  
       y = "Predictor Variables",  
       title = "Coefficients for Hispanic Signature Acceptance Rate") +  
  scale_x_continuous(limits = c(-0.25, 0.25), breaks = seq(-0.25, 0.25, by = 0.05)) + 
  scale_shape_manual(values = c("Model 2" = 21, "Model 1" = 24)) + 
  scale_fill_manual(values = c("Model 2" = "#E69F00", "Model 1" = "#0072B2")) + 
  scale_linetype_manual(values = c("Model 2" = "solid", "Model 1" = "dashed")) +   
  theme_minimal() +  
  theme(
    legend.position = "bottom",
    text = element_text(family = "EB Garamond", size = 20),  # Larger base font  
    axis.title = element_text(size = 24, face = "bold"),  # Larger axis titles  
    axis.text = element_text(size = 24),  # Larger axis labels  
    plot.title = element_text(size = 32, face = "bold", hjust = 0.5)
  )

ggsave("Figure_C1.png", width = 20, height = 10, 
       dpi = 300,
       bg = "white")



############
#Figure C2 #
############

coef_data_full <- tidy(White_Accept_Full, conf.int = TRUE) %>%
  mutate(model = "Model 2")

coef_data_basic <- tidy(White_Accept_Basic, conf.int = TRUE) %>%
  mutate(model = "Model 1")


coef_data_combined <- bind_rows(coef_data_full, coef_data_basic) %>%
  dplyr::filter(term != "(Intercept)") %>%
  mutate(term = dplyr::recode(term, "Explicit_Bias" = "Explicit Bias","Lib_Con_Ideo" = "Liberal-Conservative Ideo", "Central_WA"= "Central WA")) %>%
  mutate(Model = model)


ggplot(coef_data_combined, aes(x = estimate, y = factor(term, levels = c("Republican", "Independent", "Liberal-Conservative Ideo", "Central WA", "White", "Female", "Age", "Education", "Income", "Explicit Bias")))) +  
  geom_point(aes(shape = Model, fill = Model), size = 3.5, position = position_dodge(width = 0.5), color = "black") +  # Slightly larger dots  
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, linetype = Model), height = 0.3, position = position_dodge(width = 0.5)) +  # Slightly thicker error bars  
  geom_vline(xintercept = 0, linetype = "dotted", color = "black") +  # High-contrast reference line  
  labs(x = "Coefficient Estimates with 95% Confidence Intervals",  
       y = "Predictor Variables",  
       title = "Coefficients for White Signature Acceptance Rate") +  
  scale_x_continuous(limits = c(-0.25, 0.25), breaks = seq(-0.25, 0.25, by = 0.05)) +  
  scale_shape_manual(values = c("Model 2" = 21, "Model 1" = 24)) +  
  scale_fill_manual(values = c("Model 2" = "#E69F00", "Model 1" = "#0072B2")) +  
  scale_linetype_manual(values = c("Model 2" = "solid", "Model 1" = "dashed")) +  
  theme_minimal() +  
  theme(
    legend.position = "bottom",
    text = element_text(family = "EB Garamond", size = 20),  # Larger base font  
    axis.title = element_text(size = 24, face = "bold"),  # Larger axis titles  
    axis.text = element_text(size = 24),  # Larger axis labels  
    plot.title = element_text(size = 32, face = "bold", hjust = 0.5)
  )

ggsave("Figure_C2.png", width = 20, height = 10, 
       dpi = 300,
       bg = "white")



########################################################################
# Weighted Regression Models Explicit Bias (Hispanics) Variable Models #
########################################################################

#Tables A7-A9

#Latino Accept Basic Model
summary(Latino_Accept_Basic<-lm(Pct_Latino_Accept~ 
                                  Explicit_Bias_Latino+
                                  Female+ Age+ Education+ Income+ White+Central_WA, 
                                data=sig_match_data, weight=weight
))

#Latino Accept Full Model
summary(Latino_Accept_Full<-lm(Pct_Latino_Accept~ 
                                 Explicit_Bias_Latino+
                                 Female+ Age+ Education+ Income+ White+ Central_WA+
                                 Republican+ Independent+ Lib_Con_Ideo, 
                               data=sig_match_data, weight=weight
))


#White Accept Basic Model
summary(White_Accept_Basic<-lm(Pct_White_Accept~ 
                                 Explicit_Bias_Latino+
                                 Female+ Age+ Education+ Income+ White+Central_WA, 
                               data=sig_match_data, weight=weight
))

#White Accept Full Model
summary(White_Accept_Full<-lm(Pct_White_Accept~ 
                                Explicit_Bias_Latino+
                                Female+ Age+ Education+ Income+ White+ Central_WA+
                                Republican+ Independent+ Lib_Con_Ideo, 
                              data=sig_match_data, weight=weight
))


############
# Table A7 #
############
tab_labels <- c( "Explicit Bias (Hispanic FT)", "Female", "Age",  "Education",
                 "Income", "White", "Central WA", "Republican", "Independent", "Ideology (Lib-Con)")

stargazer(Latino_Accept_Basic, Latino_Accept_Full, covariate.labels = tab_labels,
          out.header=F,
          dep.var.labels =  c("Hispanic Signature Acceptance"),
          omit.stat=c("rsq", "adj.rsq"), ci=TRUE, ci.level=0.95, single.row=TRUE, 
          type = "latex",
          out = "table_latino_accept_weighted_hisp_ft.tex", 
          label = "table_latino_accept_weighted_hisp_ft.tex",
          star.char = c("*", "**", "***"), 
          star.cutoffs = c(.05, .01, .001),
          notes = "$^{***}$p $<$ .001; $^{**}$p $<$ .01; $^{*}$p $<$ .05",
          notes.append = F, 
          title = "Explicit Bias (Hispanics) and Hispanic Signature Acceptance (Weighted)")


############
# Table A8 #
############
tab_labels <- c( "Explicit Bias (Hispanic FT)", "Female", "Age",  "Education",
                 "Income", "White", "Central WA", "Republican", "Independent", "Ideology (Lib-Con)")

stargazer(White_Accept_Basic, White_Accept_Full, covariate.labels = tab_labels,
          out.header=F,
          dep.var.labels =  c("White Signature Acceptance"),
          omit.stat=c("rsq", "adj.rsq"), ci=TRUE, ci.level=0.95, single.row=TRUE, 
          type = "latex",
          out = "table_white_accept_weighted_hisp_ft.tex", 
          label = "table_white_accept_weighted_hisp_ft.tex",
          star.char = c("*", "**", "***"), 
          star.cutoffs = c(.05, .01, .001),
          notes = "$^{***}$p $<$ .001; $^{**}$p $<$ .01; $^{*}$p $<$ .05",
          notes.append = F, 
          title = "Explicit Bias (Hispanics) and White Signature Acceptance (Weighted)")




############
# Table A9 #
############
#Latino Accept no Weight
summary(Latino_Accept_no_weight<-lm(Pct_Latino_Accept~ 
                                      Explicit_Bias_Latino+
                                      Female+ Age+ Education+ Income+ White+ Central_WA+
                                      Republican+ Independent+ Lib_Con_Ideo, 
                                    data=sig_match_data
))


#White Accept no Weight
summary(White_Accept_no_weight<-lm(Pct_White_Accept~ 
                                     Explicit_Bias_Latino+
                                     Female+ Age+ Education+ Income+ White+ Central_WA+
                                     Republican+ Independent+ Lib_Con_Ideo, 
                                   data=sig_match_data
))


tab_labels <- c( "Explicit Bias (Hispanic FT)", "Female", "Age",  "Education",
                 "Income", "White", "Central WA","Republican", "Independent", "Ideology (Lib-Con)")

stargazer(Latino_Accept_no_weight, White_Accept_no_weight, covariate.labels = tab_labels,
          out.header=F,
          dep.var.labels =  c("Hispanic Signature Acceptance", "White Signature Acceptance"),
          omit.stat=c("rsq", "adj.rsq"), ci=TRUE, ci.level=0.95, single.row=TRUE, 
          type = "latex",
          out = "table_Latino_White_Accept_Full_No_Weight_hisp_ft.tex", 
          label = "table_Latino_White_Accept_Full_No_Weight_hisp_ft.tex",
          star.char = c("*", "**", "***"), 
          star.cutoffs = c(.05, .01, .001),
          notes = "$^{***}$p $<$ .001; $^{**}$p $<$ .01; $^{*}$p $<$ .05",
          notes.append = F, 
          title = "Explicit Bias (Hispanic) and Hispanic and White Signature Acceptance (Unweighted)")


